Project¶

In [2]:
import pandas as pd
import plotly
In [2]:
df = pd.read_csv(("C:\\Users\\sasikumarchennova\\Documents\\Data Science\\Excel\\project\\Employee_Attrition_with_missing.csv"))
In [113]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1470 entries, 0 to 1469
Data columns (total 26 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   employee_id               1470 non-null   int64  
 1   Age                       1470 non-null   float64
 2   BusinessTravel            1470 non-null   object 
 3   DailyRate                 1470 non-null   float64
 4   Department                1470 non-null   object 
 5   DistanceFromHome          1470 non-null   float64
 6   Education                 1470 non-null   object 
 7   EducationField            1470 non-null   object 
 8   EnvironmentSatisfaction   1470 non-null   object 
 9   Gender                    1470 non-null   object 
 10  HourlyRate                1470 non-null   float64
 11  JobInvolvement            1470 non-null   object 
 12  JobLevel                  1470 non-null   float64
 13  JobRole                   1470 non-null   object 
 14  JobSatisfaction           1470 non-null   object 
 15  MaritalStatus             1470 non-null   object 
 16  MonthlyIncome             1470 non-null   float64
 17  MonthlyRate               1470 non-null   float64
 18  NumCompaniesWorked        1470 non-null   float64
 19  OverTime                  1470 non-null   object 
 20  PercentSalaryHike         1470 non-null   float64
 21  PerformanceRating         1470 non-null   object 
 22  RelationshipSatisfaction  1470 non-null   object 
 23  TrainingTimesLastYear     1470 non-null   float64
 24  WorkLifeBalance           1470 non-null   object 
 25  Attrition                 1470 non-null   object 
dtypes: float64(10), int64(1), object(15)
memory usage: 298.7+ KB
In [114]:
df
Out[114]:
employee_id Age BusinessTravel DailyRate Department DistanceFromHome Education EducationField EnvironmentSatisfaction Gender ... MonthlyIncome MonthlyRate NumCompaniesWorked OverTime PercentSalaryHike PerformanceRating RelationshipSatisfaction TrainingTimesLastYear WorkLifeBalance Attrition
0 1453 41.0 Travel_Rarely 1102.0 Sales 1.0 2College Life Sciences 2Medium Female ... 5993.0 19479.0 8.0 Yes 11.0 3Excellent 1Low 0.0 1Bad Yes
1 1454 49.0 Travel_Frequently 279.0 Research & Development 8.0 1Below College Life Sciences 3High Male ... 5130.0 24907.0 1.0 No 23.0 4Outstanding 4Very High 3.0 3Better No
2 1455 37.0 Travel_Rarely 1373.0 Research & Development 2.0 2College Other 4Ver High Male ... 2090.0 2396.0 6.0 Yes 15.0 3Excellent 2Medium 3.0 3Better Yes
3 1456 33.0 Travel_Frequently 1392.0 Research & Development 3.0 4Master Life Sciences 4Ver High Female ... 2909.0 23159.0 1.0 Yes 11.0 3Excellent 3High 3.0 3Better No
4 1457 27.0 Travel_Rarely 591.0 Research & Development 2.0 1Below College Medical 1Low Male ... 3468.0 16632.0 9.0 No 12.0 3Excellent 4Very High 3.0 3Better No
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1465 2918 36.0 Travel_Frequently 884.0 Research & Development 23.0 2College Medical 3High Male ... 2571.0 12290.0 4.0 No 17.0 3Excellent 3High 3.0 3Better No
1466 2919 39.0 Travel_Rarely 613.0 Research & Development 6.0 1Below College Medical 4Ver High Male ... 9991.0 21457.0 4.0 No 15.0 3Excellent 1Low 5.0 3Better No
1467 2920 27.0 Travel_Rarely 155.0 Research & Development 4.0 3Bachelor Life Sciences 2Medium Male ... 6142.0 5174.0 1.0 Yes 20.0 4Outstanding 2Medium 0.0 3Better No
1468 2921 49.0 Travel_Frequently 1023.0 Sales 2.0 3Bachelor Medical 4Ver High Male ... 5390.0 13243.0 2.0 No 14.0 3Excellent 4Very High 3.0 2Good No
1469 2922 34.0 Travel_Rarely 628.0 Research & Development 8.0 3Bachelor Medical 2Medium Male ... 4404.0 10228.0 2.0 No 12.0 3Excellent 1Low 3.0 4Best No

1470 rows × 26 columns

Data Cleaning¶

In [4]:
df.columns
Out[4]:
Index(['employee_id', 'Age', 'BusinessTravel', 'DailyRate', 'Department',
       'DistanceFromHome', 'Education', 'EducationField',
       'EnvironmentSatisfaction', 'Gender', 'HourlyRate', 'JobInvolvement',
       'JobLevel', 'JobRole', 'JobSatisfaction', 'MaritalStatus',
       'MonthlyIncome', 'MonthlyRate', 'NumCompaniesWorked', 'OverTime',
       'PercentSalaryHike', 'PerformanceRating', 'RelationshipSatisfaction',
       'TrainingTimesLastYear', 'WorkLifeBalance', 'Attrition'],
      dtype='object')
In [124]:
def fillna_mode(data,variable): 
    data[variable].fillna(data[variable].mode()[0],inplace = True)
    
    return data[variable]
In [125]:
def fillna_mean(data, variable):
    mean_value = int(round(data[variable].mean(), 0))
    data[variable].fillna(mean_value, inplace=True)
    data[variable] = data[variable].astype(int)
    
    return data[variable]
In [126]:
emp_id = df['employee_id']
In [127]:
age = pd.DataFrame(fillna_mean(df,'Age'))
In [128]:
business_trvl = pd.DataFrame(fillna_mode(df,'BusinessTravel'))
In [129]:
daily_rate = pd.DataFrame(fillna_mean(df,'DailyRate'))
In [130]:
dept = pd.DataFrame(fillna_mode(df,'Department'))
In [131]:
dfh = pd.DataFrame(fillna_mean(df,'DistanceFromHome'))
In [132]:
education = pd.DataFrame(fillna_mode(df,'Education').str[1:])
In [133]:
edu_field = pd.DataFrame(fillna_mode(df,'EducationField'))
In [134]:
env_sat = pd.DataFrame(fillna_mode(df,'EnvironmentSatisfaction').str[1:])
In [135]:
gender = pd.DataFrame(fillna_mode(df,'Gender'))
In [136]:
hr_rate = pd.DataFrame(fillna_mean(df,'HourlyRate'))
In [137]:
job_inv = pd.DataFrame(fillna_mode(df,'JobInvolvement').str[1:])
In [138]:
job_level = pd.DataFrame(fillna_mean(df,'JobLevel'))
In [139]:
job_role = pd.DataFrame(fillna_mode(df,'JobRole'))
In [140]:
job_satis = pd.DataFrame(fillna_mode(df,'JobSatisfaction').str[1:])
In [141]:
mar_staus = pd.DataFrame(fillna_mode(df,'MaritalStatus'))
In [142]:
mon_inc = pd.DataFrame(fillna_mean(df,'MonthlyIncome'))
In [143]:
mon_rat = pd.DataFrame(fillna_mean(df,'MonthlyRate'))
In [144]:
no_com_wor = pd.DataFrame(fillna_mean(df,'NumCompaniesWorked'))
In [145]:
ovr_tm = pd.DataFrame(fillna_mode(df,'OverTime'))
In [146]:
per_sal_hike = pd.DataFrame(fillna_mean(df,'PercentSalaryHike'))
In [147]:
per_rat = pd.DataFrame(fillna_mode(df,'PerformanceRating').str[1:])
In [148]:
rela_sat = pd.DataFrame(fillna_mode(df,'RelationshipSatisfaction').str[1:])
In [149]:
tr_last_yr = pd.DataFrame(fillna_mean(df,'TrainingTimesLastYear'))
In [150]:
wrk_lf_baln = pd.DataFrame(fillna_mode(df,'WorkLifeBalance').str[1:])
In [151]:
attr = pd.DataFrame(fillna_mode(df,'Attrition'))
In [152]:
table = pd.concat([emp_id,age, business_trvl,daily_rate,dept,dfh,education,edu_field,env_sat,gender,hr_rate,job_inv,job_level,job_role,job_satis,
					mar_staus,mon_inc,mon_rat,no_com_wor,ovr_tm,per_sal_hike,per_rat,rela_sat,tr_last_yr,wrk_lf_baln,attr], axis=1)
In [153]:
table.columns
Out[153]:
Index(['employee_id', 'Age', 'BusinessTravel', 'DailyRate', 'Department',
       'DistanceFromHome', 'Education', 'EducationField',
       'EnvironmentSatisfaction', 'Gender', 'HourlyRate', 'JobInvolvement',
       'JobLevel', 'JobRole', 'JobSatisfaction', 'MaritalStatus',
       'MonthlyIncome', 'MonthlyRate', 'NumCompaniesWorked', 'OverTime',
       'PercentSalaryHike', 'PerformanceRating', 'RelationshipSatisfaction',
       'TrainingTimesLastYear', 'WorkLifeBalance', 'Attrition'],
      dtype='object')
In [154]:
table.to_csv('C:\\Users\\sasikumarchennova\\Documents\\Data Science\\Python\\project\\cleaned_table.csv',index= False)

Univariate Table and Diagram¶

In [3]:
df = pd.read_csv(("C:\\Users\\sasikumarchennova\\Documents\\Data Science\\Python\\project\\cleaned_table.csv"))
In [4]:
df
Out[4]:
employee_id Age BusinessTravel DailyRate Department DistanceFromHome Education EducationField EnvironmentSatisfaction Gender ... MonthlyIncome MonthlyRate NumCompaniesWorked OverTime PercentSalaryHike PerformanceRating RelationshipSatisfaction TrainingTimesLastYear WorkLifeBalance Attrition
0 1453 41 Travel_Rarely 1102 Sales 1 College Life Sciences Medium Female ... 5993 19479 8 Yes 11 Excellent Low 0 Bad Yes
1 1454 49 Travel_Frequently 279 Research & Development 8 Below College Life Sciences High Male ... 5130 24907 1 No 23 Outstanding Very High 3 Better No
2 1455 37 Travel_Rarely 1373 Research & Development 2 College Other Ver High Male ... 2090 2396 6 Yes 15 Excellent Medium 3 Better Yes
3 1456 33 Travel_Frequently 1392 Research & Development 3 Master Life Sciences Ver High Female ... 2909 23159 1 Yes 11 Excellent High 3 Better No
4 1457 27 Travel_Rarely 591 Research & Development 2 Below College Medical Low Male ... 3468 16632 9 No 12 Excellent Very High 3 Better No
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1465 2918 36 Travel_Frequently 884 Research & Development 23 College Medical High Male ... 2571 12290 4 No 17 Excellent High 3 Better No
1466 2919 39 Travel_Rarely 613 Research & Development 6 Below College Medical Ver High Male ... 9991 21457 4 No 15 Excellent Low 5 Better No
1467 2920 27 Travel_Rarely 155 Research & Development 4 Bachelor Life Sciences Medium Male ... 6142 5174 1 Yes 20 Outstanding Medium 0 Better No
1468 2921 49 Travel_Frequently 1023 Sales 2 Bachelor Medical Ver High Male ... 5390 13243 2 No 14 Excellent Very High 3 Good No
1469 2922 34 Travel_Rarely 628 Research & Development 8 Bachelor Medical Medium Male ... 4404 10228 2 No 12 Excellent Low 3 Best No

1470 rows × 26 columns

In [5]:
df.columns
Out[5]:
Index(['employee_id', 'Age', 'BusinessTravel', 'DailyRate', 'Department',
       'DistanceFromHome', 'Education', 'EducationField',
       'EnvironmentSatisfaction', 'Gender', 'HourlyRate', 'JobInvolvement',
       'JobLevel', 'JobRole', 'JobSatisfaction', 'MaritalStatus',
       'MonthlyIncome', 'MonthlyRate', 'NumCompaniesWorked', 'OverTime',
       'PercentSalaryHike', 'PerformanceRating', 'RelationshipSatisfaction',
       'TrainingTimesLastYear', 'WorkLifeBalance', 'Attrition'],
      dtype='object')
In [6]:
import pandas as pd
import plotly.express as px

def univariate_table_diagram(df, variable, title):
    table = pd.DataFrame(df.groupby([variable]).size().reset_index())
    table.columns = ['Category', 'Counts']
    table['Percentage'] = round(table['Counts'] / sum(table['Counts']) * 100, 0)
    table['Percentage_with_sign'] = table['Percentage'].astype(str) + "%"
    
    fig = px.bar(table,
                 x='Category',
                 y='Counts',
                 title=title,
                 text=table['Percentage_with_sign'])
    fig.update_layout(template='plotly_white', title_x=.5)
    fig.show()

    return table
In [7]:
pd.DataFrame(univariate_table_diagram(df,'BusinessTravel','Business Travel '))
Out[7]:
Category Counts Percentage Percentage_with_sign
0 Non-Travel 148 10.0 10.0%
1 Travel_Frequently 273 19.0 19.0%
2 Travel_Rarely 1049 71.0 71.0%
In [8]:
pd.DataFrame(univariate_table_diagram(df,'Department','Department'))
Out[8]:
Category Counts Percentage Percentage_with_sign
0 Human Resources 62 4.0 4.0%
1 Research & Development 966 66.0 66.0%
2 Sales 442 30.0 30.0%
In [9]:
pd.DataFrame(univariate_table_diagram(df,'Education','Education'))
Out[9]:
Category Counts Percentage Percentage_with_sign
0 Bachelor 583 40.0 40.0%
1 Below College 169 11.0 11.0%
2 College 279 19.0 19.0%
3 Doctor 48 3.0 3.0%
4 Master 391 27.0 27.0%
In [10]:
pd.DataFrame(univariate_table_diagram(df,'EducationField','Education Field'))
Out[10]:
Category Counts Percentage Percentage_with_sign
0 Human Resources 27 2.0 2.0%
1 Life Sciences 613 42.0 42.0%
2 Marketing 157 11.0 11.0%
3 Medical 459 31.0 31.0%
4 Other 82 6.0 6.0%
5 Technical Degree 132 9.0 9.0%
In [11]:
pd.DataFrame(univariate_table_diagram(df,'EnvironmentSatisfaction','Environment Satisfaction'))
Out[11]:
Category Counts Percentage Percentage_with_sign
0 High 468 32.0 32.0%
1 Low 279 19.0 19.0%
2 Medium 282 19.0 19.0%
3 Ver High 441 30.0 30.0%
In [12]:
pd.DataFrame(univariate_table_diagram(df,'Gender','Gender'))
Out[12]:
Category Counts Percentage Percentage_with_sign
0 Female 583 40.0 40.0%
1 Male 887 60.0 60.0%
In [13]:
pd.DataFrame(univariate_table_diagram(df,'JobInvolvement','Job Involvement'))
Out[13]:
Category Counts Percentage Percentage_with_sign
0 High 873 59.0 59.0%
1 Low 81 6.0 6.0%
2 Medium 372 25.0 25.0%
3 Very High 144 10.0 10.0%
In [14]:
pd.DataFrame(univariate_table_diagram(df,'JobRole','Job Role'))
Out[14]:
Category Counts Percentage Percentage_with_sign
0 Healthcare Representative 130 9.0 9.0%
1 Human Resources 52 4.0 4.0%
2 Laboratory Technician 256 17.0 17.0%
3 Manager 102 7.0 7.0%
4 Manufacturing Director 144 10.0 10.0%
5 Research Director 79 5.0 5.0%
6 Research Scientist 290 20.0 20.0%
7 Sales Executive 335 23.0 23.0%
8 Sales Representative 82 6.0 6.0%
In [15]:
pd.DataFrame(univariate_table_diagram(df,'JobSatisfaction','Job Satisfaction'))
Out[15]:
Category Counts Percentage Percentage_with_sign
0 High 440 30.0 30.0%
1 Low 286 19.0 19.0%
2 Medium 278 19.0 19.0%
3 Very High 466 32.0 32.0%
In [16]:
pd.DataFrame(univariate_table_diagram(df,'MaritalStatus','Marital Status'))
Out[16]:
Category Counts Percentage Percentage_with_sign
0 Divorced 326 22.0 22.0%
1 Married 680 46.0 46.0%
2 Single 464 32.0 32.0%
In [17]:
pd.DataFrame(univariate_table_diagram(df,'PerformanceRating','Performance Rating'))
Out[17]:
Category Counts Percentage Percentage_with_sign
0 Excellent 1248 85.0 85.0%
1 Outstanding 222 15.0 15.0%
In [18]:
pd.DataFrame(univariate_table_diagram(df,'RelationshipSatisfaction','Relationship Satisfaction'))
Out[18]:
Category Counts Percentage Percentage_with_sign
0 High 467 32.0 32.0%
1 Low 274 19.0 19.0%
2 Medium 301 20.0 20.0%
3 Very High 428 29.0 29.0%
In [19]:
pd.DataFrame(univariate_table_diagram(df,'WorkLifeBalance','Work Life Balance'))
Out[19]:
Category Counts Percentage Percentage_with_sign
0 Bad 80 5.0 5.0%
1 Best 153 10.0 10.0%
2 Better 896 61.0 61.0%
3 Good 341 23.0 23.0%

Pie Chart¶

In [20]:
import pandas as pd
import plotly.express as px

def univariate_pie_chart(df, variable, title):
    table = pd.DataFrame(df.groupby([variable]).size().reset_index())
    table.columns = ['Category', 'Counts']
    table['Percentage'] = round(table['Counts'] / sum(table['Counts']) * 100, 0)
    table['Percentage_with_sign'] = table['Percentage'].astype(str) + "%"
    
    fig = px.pie(table, values='Counts', names='Category', title=title)
    fig.update_layout(template='seaborn', title_x=0.5)
    fig.show()
    
    return table
In [21]:
pd.DataFrame(univariate_pie_chart(df,'WorkLifeBalance','Work Life Balance'))
Out[21]:
Category Counts Percentage Percentage_with_sign
0 Bad 80 5.0 5.0%
1 Best 153 10.0 10.0%
2 Better 896 61.0 61.0%
3 Good 341 23.0 23.0%
In [22]:
pd.DataFrame(univariate_pie_chart(df,'Education','Education'))
Out[22]:
Category Counts Percentage Percentage_with_sign
0 Bachelor 583 40.0 40.0%
1 Below College 169 11.0 11.0%
2 College 279 19.0 19.0%
3 Doctor 48 3.0 3.0%
4 Master 391 27.0 27.0%
In [23]:
pd.DataFrame(univariate_pie_chart(df,'MaritalStatus','Marital Status'))
Out[23]:
Category Counts Percentage Percentage_with_sign
0 Divorced 326 22.0 22.0%
1 Married 680 46.0 46.0%
2 Single 464 32.0 32.0%
In [24]:
pd.DataFrame(univariate_pie_chart(df,'PerformanceRating','Performance Rating'))
Out[24]:
Category Counts Percentage Percentage_with_sign
0 Excellent 1248 85.0 85.0%
1 Outstanding 222 15.0 15.0%
In [25]:
pd.DataFrame(univariate_pie_chart(df,'JobRole','Job Role'))
Out[25]:
Category Counts Percentage Percentage_with_sign
0 Healthcare Representative 130 9.0 9.0%
1 Human Resources 52 4.0 4.0%
2 Laboratory Technician 256 17.0 17.0%
3 Manager 102 7.0 7.0%
4 Manufacturing Director 144 10.0 10.0%
5 Research Director 79 5.0 5.0%
6 Research Scientist 290 20.0 20.0%
7 Sales Executive 335 23.0 23.0%
8 Sales Representative 82 6.0 6.0%

Grouping and Bar Chart¶

In [26]:
def group(x):
    
    if x <18:
        status=  "< 18"
    elif x<24:
        status = "19-24"
    elif x<34:
        status = "25-34"
    elif x<44:
        status = "35-44"
    elif x<54:
        status = "45-54"
    else:
        status = "55 +"
    
    return status
In [27]:
df['Age_group'] = df['Age'].apply(group)
In [28]:
import pandas as pd
import plotly.express as px

def univariate_table_diagram(df, variable, title):
    table = pd.DataFrame(df.groupby([variable]).size().reset_index())
    table.columns = ['Category', 'Counts']
    table['Percentage'] = round(table['Counts'] / sum(table['Counts']) * 100, 0)
    table['Percentage_with_sign'] = table['Percentage'].astype(str) + "%"
    
    fig = px.bar(table,
                 x='Category',
                 y='Counts',
                 title=title,
                 text=table['Percentage_with_sign'].astype(str),color='Category')
    fig.update_layout(template='plotly_white', title_x=0.5).show()    
    return table
In [29]:
pd.DataFrame(univariate_table_diagram(df,'Age_group','Age'))
Out[29]:
Category Counts Percentage Percentage_with_sign
0 19-24 71 5.0 5.0%
1 25-34 496 34.0 34.0%
2 35-44 559 38.0 38.0%
3 45-54 257 17.0 17.0%
4 55 + 87 6.0 6.0%
In [30]:
import pandas as pd
import plotly.express as px

def univariate_table_diagram(df, variable, title):
    table = pd.DataFrame(df.groupby([variable]).size().reset_index())
    table.columns = ['Category', 'Counts']
    table['Percentage'] = round(table['Counts'] / sum(table['Counts']) * 100, 0)
    table['Percentage_with_sign'] = table['Percentage'].astype(str) + "%"
    
    ##table = table.sort_values('Counts', ascending=False)  # Sort by Counts in descending order
    
    fig = px.bar(table,
                 x='Category',
                 y='Counts',
                 title=title,
                 text=table['Percentage_with_sign'].astype(str),
                 color='Category')
    fig.update_layout(template='plotly_white', title_x=0.5).show()    
    return table
In [31]:
import numpy as np
import pandas as pd

def categorize_daily_rate(daily_rate):
    if daily_rate <= 200:
        return "<200"
    elif daily_rate <= 499:
        return "201-499"
    elif daily_rate <= 897:
        return "500-897"
    elif daily_rate <= 1296:
        return "898-1296"
    elif daily_rate <= 1697:
        return "1297-1693"
    else:
        return ">1694"

df['DailyRate_Group'] = df['DailyRate'].apply(categorize_daily_rate)
df['DailyRate_Group'] = pd.Categorical(df['DailyRate_Group'], categories=['<200', '201-499', '500-897', '898-1296', '1297-1693', '>1694'], ordered=True)
In [32]:
pd.DataFrame(univariate_table_diagram(df,'DailyRate_Group','Daily Rate'))
Out[32]:
Category Counts Percentage Percentage_with_sign
0 <200 106 7.0 7.0%
1 201-499 295 20.0 20.0%
2 500-897 441 30.0 30.0%
3 898-1296 406 28.0 28.0%
4 1297-1693 221 15.0 15.0%
5 >1694 1 0.0 0.0%
In [33]:
import numpy as np
import pandas as pd

def categorize_distance_f_h(dfh):
    if dfh <= 5:
        return "<5 KM"
    elif dfh <= 10:
        return "6-10 KM"
    elif dfh <= 15:
        return "11-15 KM"
    elif dfh <= 20:
        return "16-20 KM"
    elif dfh <= 25:
        return "21-25 KM"
    else:
        return "25 + KM"
    
df['Distance_Group'] = df['DistanceFromHome'].apply(categorize_distance_f_h)
df['Distance_Group'] = pd.Categorical(df['Distance_Group'], categories=['<5 KM', '6-10 KM', '11-15 KM', '16-20 KM', '21-25 KM', '25 + KM'], ordered=True)
In [34]:
pd.DataFrame(univariate_table_diagram(df,'Distance_Group','Distance From Home'))
Out[34]:
Category Counts Percentage Percentage_with_sign
0 <5 KM 624 42.0 42.0%
1 6-10 KM 406 28.0 28.0%
2 11-15 KM 114 8.0 8.0%
3 16-20 KM 123 8.0 8.0%
4 21-25 KM 116 8.0 8.0%
5 25 + KM 87 6.0 6.0%
In [35]:
import numpy as np
import pandas as pd

def categorize_mon_income(monthly_income):
    if monthly_income <= 6000:
        return "< 6000 Monthly Income"
    elif monthly_income <= 11000:
        return "6001 - 10999 Monthly Income"
    elif monthly_income <= 16000:
        return "11001-16000 Monthly Income"
    elif monthly_income <= 21000:
        return "16001-21000 Monthly Income"
    else:
        return "21000 + Monthly Income"
    
df['MonthlyIncome_Group'] = df['MonthlyIncome'].apply(categorize_mon_income)
df['MonthlyIncome_Group'] = pd.Categorical(df['MonthlyIncome_Group'], categories=['< 6000 Monthly Income', '6001 - 10999 Monthly Income', '11001-16000 Monthly Income', '16001-21000 Monthly Income', '21000 + Monthly Income'], ordered=True)
In [36]:
pd.DataFrame(univariate_table_diagram(df,'MonthlyIncome_Group','Monthly Income'))
Out[36]:
Category Counts Percentage Percentage_with_sign
0 < 6000 Monthly Income 906 62.0 62.0%
1 6001 - 10999 Monthly Income 349 24.0 24.0%
2 11001-16000 Monthly Income 89 6.0 6.0%
3 16001-21000 Monthly Income 126 9.0 9.0%
4 21000 + Monthly Income 0 0.0 0.0%
In [37]:
import numpy as np
import pandas as pd

def categorize_mon_rate(monthly_rate):
    if monthly_rate <= 10000:
        return "< 10000 Monthly Rate"
    elif monthly_rate <= 18000:
        return "10001 - 18000 Monthly Rate"
    elif monthly_rate <= 26000:
        return "18001-26000 Monthly Rate"
    elif monthly_rate <= 34000:
        return "26001-34000 Monthly Rate"
    elif monthly_rate <=58000:
        return "34001 - 58000 Monthly Rate"
    else:
        return "58001 + Monthly Rate"
    
df['MonthlyRate_Group'] = df['MonthlyRate'].apply(categorize_mon_rate)
df['MonthlyRate_Group'] = pd.Categorical(df['MonthlyRate_Group'], categories=['< 10000 Monthly Rate', '10001 - 18000 Monthly Rate', '18001-26000 Monthly Rate', '26001-34000 Monthly Rate', '34001 - 58000 Monthly Rate','58001 + Monthly Rate'], ordered=True)
In [38]:
pd.DataFrame(univariate_table_diagram(df,'MonthlyRate_Group','MonthlyRate Income'))
Out[38]:
Category Counts Percentage Percentage_with_sign
0 < 10000 Monthly Rate 485 33.0 33.0%
1 10001 - 18000 Monthly Rate 474 32.0 32.0%
2 18001-26000 Monthly Rate 461 31.0 31.0%
3 26001-34000 Monthly Rate 49 3.0 3.0%
4 34001 - 58000 Monthly Rate 1 0.0 0.0%
5 58001 + Monthly Rate 0 0.0 0.0%
In [39]:
import numpy as np
import pandas as pd

def categorize_company_work(work):
    if work <= 1:
        return "< 1 Company"
    elif work <= 3:
        return "2-3 Company"
    elif work <= 5:
        return "4-5 Company"
    elif work <= 7:
        return "6-7 Company"
    elif work <=9:
        return "8-9 Company"
    else:
        return "More than 9 Company"
    
df['NumCompaniesWorked_Group'] = df['NumCompaniesWorked'].apply(categorize_company_work)
df['NumCompaniesWorked_Group'] = pd.Categorical(df['NumCompaniesWorked_Group'], categories=['< 1 Company', '2-3 Company', '4-5 Company', '6-7 Company', '8-9 Company','More than 9 Company'], ordered=True)
In [40]:
pd.DataFrame(univariate_table_diagram(df,'NumCompaniesWorked_Group','Num Companies Worked'))
Out[40]:
Category Counts Percentage Percentage_with_sign
0 < 1 Company 711 48.0 48.0%
1 2-3 Company 316 21.0 21.0%
2 4-5 Company 201 14.0 14.0%
3 6-7 Company 142 10.0 10.0%
4 8-9 Company 100 7.0 7.0%
5 More than 9 Company 0 0.0 0.0%
In [41]:
import numpy as np
import pandas as pd

def categorize_salary_hike(hike):
    if hike <= 13:
        return "< 13 percentage %"
    elif hike <= 16:
        return "14-16 percentage %"
    elif hike <= 19:
        return "17-19 percentage %"
    elif hike <= 22:
        return "20-22 percentage %"
    else:
        return "22 + percentage %"
    
df['PercentSalaryHike_Group'] = df['PercentSalaryHike'].apply(categorize_salary_hike)
df['PercentSalaryHike_Group'] = pd.Categorical(df['PercentSalaryHike_Group'], categories=['< 13 percentage %', '14-16 percentage %', '17-19 percentage %', '20-22 percentage %', '22 + percentage %'], ordered=True)
In [42]:
pd.DataFrame(univariate_table_diagram(df,'PercentSalaryHike_Group','Percent Salary Hike'))
Out[42]:
Category Counts Percentage Percentage_with_sign
0 < 13 percentage % 610 41.0 41.0%
1 14-16 percentage % 393 27.0 27.0%
2 17-19 percentage % 244 17.0 17.0%
3 20-22 percentage % 159 11.0 11.0%
4 22 + percentage % 64 4.0 4.0%

Bivariate table and Chart¶

In [43]:
df
Out[43]:
employee_id Age BusinessTravel DailyRate Department DistanceFromHome Education EducationField EnvironmentSatisfaction Gender ... TrainingTimesLastYear WorkLifeBalance Attrition Age_group DailyRate_Group Distance_Group MonthlyIncome_Group MonthlyRate_Group NumCompaniesWorked_Group PercentSalaryHike_Group
0 1453 41 Travel_Rarely 1102 Sales 1 College Life Sciences Medium Female ... 0 Bad Yes 35-44 898-1296 <5 KM < 6000 Monthly Income 18001-26000 Monthly Rate 8-9 Company < 13 percentage %
1 1454 49 Travel_Frequently 279 Research & Development 8 Below College Life Sciences High Male ... 3 Better No 45-54 201-499 6-10 KM < 6000 Monthly Income 18001-26000 Monthly Rate < 1 Company 22 + percentage %
2 1455 37 Travel_Rarely 1373 Research & Development 2 College Other Ver High Male ... 3 Better Yes 35-44 1297-1693 <5 KM < 6000 Monthly Income < 10000 Monthly Rate 6-7 Company 14-16 percentage %
3 1456 33 Travel_Frequently 1392 Research & Development 3 Master Life Sciences Ver High Female ... 3 Better No 25-34 1297-1693 <5 KM < 6000 Monthly Income 18001-26000 Monthly Rate < 1 Company < 13 percentage %
4 1457 27 Travel_Rarely 591 Research & Development 2 Below College Medical Low Male ... 3 Better No 25-34 500-897 <5 KM < 6000 Monthly Income 10001 - 18000 Monthly Rate 8-9 Company < 13 percentage %
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1465 2918 36 Travel_Frequently 884 Research & Development 23 College Medical High Male ... 3 Better No 35-44 500-897 21-25 KM < 6000 Monthly Income 10001 - 18000 Monthly Rate 4-5 Company 17-19 percentage %
1466 2919 39 Travel_Rarely 613 Research & Development 6 Below College Medical Ver High Male ... 5 Better No 35-44 500-897 6-10 KM 6001 - 10999 Monthly Income 18001-26000 Monthly Rate 4-5 Company 14-16 percentage %
1467 2920 27 Travel_Rarely 155 Research & Development 4 Bachelor Life Sciences Medium Male ... 0 Better No 25-34 <200 <5 KM 6001 - 10999 Monthly Income < 10000 Monthly Rate < 1 Company 20-22 percentage %
1468 2921 49 Travel_Frequently 1023 Sales 2 Bachelor Medical Ver High Male ... 3 Good No 45-54 898-1296 <5 KM < 6000 Monthly Income 10001 - 18000 Monthly Rate 2-3 Company 14-16 percentage %
1469 2922 34 Travel_Rarely 628 Research & Development 8 Bachelor Medical Medium Male ... 3 Best No 35-44 500-897 6-10 KM < 6000 Monthly Income 10001 - 18000 Monthly Rate 2-3 Company < 13 percentage %

1470 rows × 33 columns

In [66]:
def bivariate_table(df, row, column):
    table = df.groupby([row, column]).size().reset_index()
    table['percentage'] = df.groupby([row, column]).size().groupby(level=0).apply(lambda x: 100 * x / float(x.sum())).values.round(1)
    table.columns = [row, column, 'Counts', 'Percentage']
    table['Percentage'] = table['Percentage'].astype(str) + '%'
    return table
In [67]:
bivariate_table(df,'BusinessTravel','Attrition')
C:\Users\sasikumarchennova\AppData\Local\Temp\ipykernel_19564\89530821.py:3: FutureWarning:

Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)

To adopt the future behavior and silence this warning, use 

	>>> .groupby(..., group_keys=True)

Out[67]:
BusinessTravel Attrition Counts Percentage
0 Non-Travel No 137 92.6%
1 Non-Travel Yes 11 7.4%
2 Travel_Frequently No 205 75.1%
3 Travel_Frequently Yes 68 24.9%
4 Travel_Rarely No 891 84.9%
5 Travel_Rarely Yes 158 15.1%
In [69]:
import pandas as pd
import plotly.express as px

def create_bivariate_stacked_bar_chart(df, row, column, title, xaxis_title, yaxis_title):
    def bivariate_table(df, row, column):
        table = df.groupby([row, column]).size().reset_index()
        table['percentage'] = df.groupby([row, column]).size().groupby(level=0).apply(lambda x: 100 * x / float(x.sum())).values.round(1)
        table.columns = [row, column, 'Counts', 'Percentage']
        table['Percent'] = table['Percentage'].astype(str) + '%'
        return table
    
    table = bivariate_table(df, row, column)

    fig = px.bar(table, x=row, y='Percentage', color=column, barmode='stack', text=table['Percentage'])
    fig.update_layout(title=title, xaxis_title=xaxis_title, yaxis_title=yaxis_title, width=800, height=600)

    fig.show()
    
    return table
In [70]:
create_bivariate_stacked_bar_chart(df,'BusinessTravel','Attrition','Business Travel vs Attrition', 'Business Travel', 'Pecentage %')
C:\Users\sasikumarchennova\AppData\Local\Temp\ipykernel_19564\4069780291.py:7: FutureWarning:

Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)

To adopt the future behavior and silence this warning, use 

	>>> .groupby(..., group_keys=True)

Out[70]:
BusinessTravel Attrition Counts Percentage Percent
0 Non-Travel No 137 92.6 92.6%
1 Non-Travel Yes 11 7.4 7.4%
2 Travel_Frequently No 205 75.1 75.1%
3 Travel_Frequently Yes 68 24.9 24.9%
4 Travel_Rarely No 891 84.9 84.9%
5 Travel_Rarely Yes 158 15.1 15.1%
In [71]:
create_bivariate_stacked_bar_chart(df,'Age_group','Attrition','Age vs Attrition', 'Age', 'Pecentage %')
C:\Users\sasikumarchennova\AppData\Local\Temp\ipykernel_19564\4069780291.py:7: FutureWarning:

Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)

To adopt the future behavior and silence this warning, use 

	>>> .groupby(..., group_keys=True)

Out[71]:
Age_group Attrition Counts Percentage Percent
0 19-24 No 40 56.3 56.3%
1 19-24 Yes 31 43.7 43.7%
2 25-34 No 389 78.4 78.4%
3 25-34 Yes 107 21.6 21.6%
4 35-44 No 502 89.8 89.8%
5 35-44 Yes 57 10.2 10.2%
6 45-54 No 226 87.9 87.9%
7 45-54 Yes 31 12.1 12.1%
8 55 + No 76 87.4 87.4%
9 55 + Yes 11 12.6 12.6%
In [72]:
create_bivariate_stacked_bar_chart(df,'DailyRate_Group','Attrition','Daily Rate vs Attrition', 'Daily Rate', 'Pecentage %')
C:\Users\sasikumarchennova\AppData\Local\Temp\ipykernel_19564\4069780291.py:7: FutureWarning:

Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)

To adopt the future behavior and silence this warning, use 

	>>> .groupby(..., group_keys=True)

Out[72]:
DailyRate_Group Attrition Counts Percentage Percent
0 <200 No 93 87.7 87.7%
1 <200 Yes 13 12.3 12.3%
2 201-499 No 232 78.6 78.6%
3 201-499 Yes 63 21.4 21.4%
4 500-897 No 366 83.0 83.0%
5 500-897 Yes 75 17.0 17.0%
6 898-1296 No 353 86.9 86.9%
7 898-1296 Yes 53 13.1 13.1%
8 1297-1693 No 188 85.1 85.1%
9 1297-1693 Yes 33 14.9 14.9%
10 >1694 No 1 100.0 100.0%
11 >1694 Yes 0 0.0 0.0%
In [73]:
create_bivariate_stacked_bar_chart(df,'Distance_Group','Attrition','Distance From Home vs Attrition', 'Distance From Home', 'Pecentage %')
C:\Users\sasikumarchennova\AppData\Local\Temp\ipykernel_19564\4069780291.py:7: FutureWarning:

Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)

To adopt the future behavior and silence this warning, use 

	>>> .groupby(..., group_keys=True)

Out[73]:
Distance_Group Attrition Counts Percentage Percent
0 <5 KM No 539 86.4 86.4%
1 <5 KM Yes 85 13.6 13.6%
2 6-10 KM No 346 85.2 85.2%
3 6-10 KM Yes 60 14.8 14.8%
4 11-15 KM No 89 78.1 78.1%
5 11-15 KM Yes 25 21.9 21.9%
6 16-20 KM No 101 82.1 82.1%
7 16-20 KM Yes 22 17.9 17.9%
8 21-25 KM No 84 72.4 72.4%
9 21-25 KM Yes 32 27.6 27.6%
10 25 + KM No 74 85.1 85.1%
11 25 + KM Yes 13 14.9 14.9%
In [74]:
create_bivariate_stacked_bar_chart(df,'MonthlyIncome_Group','Attrition','Monthly Income vs Attrition', 'Monthly Income', 'Pecentage %')
C:\Users\sasikumarchennova\AppData\Local\Temp\ipykernel_19564\4069780291.py:7: FutureWarning:

Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)

To adopt the future behavior and silence this warning, use 

	>>> .groupby(..., group_keys=True)

Out[74]:
MonthlyIncome_Group Attrition Counts Percentage Percent
0 < 6000 Monthly Income No 727 80.2 80.2%
1 < 6000 Monthly Income Yes 179 19.8 19.8%
2 6001 - 10999 Monthly Income No 303 86.8 86.8%
3 6001 - 10999 Monthly Income Yes 46 13.2 13.2%
4 11001-16000 Monthly Income No 82 92.1 92.1%
5 11001-16000 Monthly Income Yes 7 7.9 7.9%
6 16001-21000 Monthly Income No 121 96.0 96.0%
7 16001-21000 Monthly Income Yes 5 4.0 4.0%
8 21000 + Monthly Income No 0 NaN nan%
9 21000 + Monthly Income Yes 0 NaN nan%
In [75]:
create_bivariate_stacked_bar_chart(df,'MonthlyRate_Group','Attrition','Monthly Rate vs Attrition', 'Monthly Rate', 'Pecentage %')
C:\Users\sasikumarchennova\AppData\Local\Temp\ipykernel_19564\4069780291.py:7: FutureWarning:

Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)

To adopt the future behavior and silence this warning, use 

	>>> .groupby(..., group_keys=True)

Out[75]:
MonthlyRate_Group Attrition Counts Percentage Percent
0 < 10000 Monthly Rate No 414 85.4 85.4%
1 < 10000 Monthly Rate Yes 71 14.6 14.6%
2 10001 - 18000 Monthly Rate No 393 82.9 82.9%
3 10001 - 18000 Monthly Rate Yes 81 17.1 17.1%
4 18001-26000 Monthly Rate No 387 83.9 83.9%
5 18001-26000 Monthly Rate Yes 74 16.1 16.1%
6 26001-34000 Monthly Rate No 39 79.6 79.6%
7 26001-34000 Monthly Rate Yes 10 20.4 20.4%
8 34001 - 58000 Monthly Rate No 0 0.0 0.0%
9 34001 - 58000 Monthly Rate Yes 1 100.0 100.0%
10 58001 + Monthly Rate No 0 NaN nan%
11 58001 + Monthly Rate Yes 0 NaN nan%
In [76]:
create_bivariate_stacked_bar_chart(df,'NumCompaniesWorked_Group','Attrition','Num Companies Worked vs Attrition', 'Num Companies Worked ', 'Pecentage %')
C:\Users\sasikumarchennova\AppData\Local\Temp\ipykernel_19564\4069780291.py:7: FutureWarning:

Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)

To adopt the future behavior and silence this warning, use 

	>>> .groupby(..., group_keys=True)

Out[76]:
NumCompaniesWorked_Group Attrition Counts Percentage Percent
0 < 1 Company No 591 83.1 83.1%
1 < 1 Company Yes 120 16.9 16.9%
2 2-3 Company No 282 89.2 89.2%
3 2-3 Company Yes 34 10.8 10.8%
4 4-5 Company No 168 83.6 83.6%
5 4-5 Company Yes 33 16.4 16.4%
6 6-7 Company No 109 76.8 76.8%
7 6-7 Company Yes 33 23.2 23.2%
8 8-9 Company No 83 83.0 83.0%
9 8-9 Company Yes 17 17.0 17.0%
10 More than 9 Company No 0 NaN nan%
11 More than 9 Company Yes 0 NaN nan%
In [77]:
create_bivariate_stacked_bar_chart(df,'PercentSalaryHike_Group','Attrition','Salary Hike vs Attrition', 'Salary Hike', 'Pecentage %')
C:\Users\sasikumarchennova\AppData\Local\Temp\ipykernel_19564\4069780291.py:7: FutureWarning:

Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)

To adopt the future behavior and silence this warning, use 

	>>> .groupby(..., group_keys=True)

Out[77]:
PercentSalaryHike_Group Attrition Counts Percentage Percent
0 < 13 percentage % No 503 82.5 82.5%
1 < 13 percentage % Yes 107 17.5 17.5%
2 14-16 percentage % No 335 85.2 85.2%
3 14-16 percentage % Yes 58 14.8 14.8%
4 17-19 percentage % No 209 85.7 85.7%
5 17-19 percentage % Yes 35 14.3 14.3%
6 20-22 percentage % No 135 84.9 84.9%
7 20-22 percentage % Yes 24 15.1 15.1%
8 22 + percentage % No 51 79.7 79.7%
9 22 + percentage % Yes 13 20.3 20.3%
In [78]:
create_bivariate_stacked_bar_chart(df,'Department','Attrition','Departmentvs Attrition', 'Department', 'Pecentage %')
C:\Users\sasikumarchennova\AppData\Local\Temp\ipykernel_19564\4069780291.py:7: FutureWarning:

Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)

To adopt the future behavior and silence this warning, use 

	>>> .groupby(..., group_keys=True)

Out[78]:
Department Attrition Counts Percentage Percent
0 Human Resources No 50 80.6 80.6%
1 Human Resources Yes 12 19.4 19.4%
2 Research & Development No 832 86.1 86.1%
3 Research & Development Yes 134 13.9 13.9%
4 Sales No 351 79.4 79.4%
5 Sales Yes 91 20.6 20.6%
In [79]:
create_bivariate_stacked_bar_chart(df,'Education','Attrition','Education Attrition', 'Education', 'Pecentage %')
C:\Users\sasikumarchennova\AppData\Local\Temp\ipykernel_19564\4069780291.py:7: FutureWarning:

Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)

To adopt the future behavior and silence this warning, use 

	>>> .groupby(..., group_keys=True)

Out[79]:
Education Attrition Counts Percentage Percent
0 Bachelor No 482 82.7 82.7%
1 Bachelor Yes 101 17.3 17.3%
2 Below College No 139 82.2 82.2%
3 Below College Yes 30 17.8 17.8%
4 College No 235 84.2 84.2%
5 College Yes 44 15.8 15.8%
6 Doctor No 43 89.6 89.6%
7 Doctor Yes 5 10.4 10.4%
8 Master No 334 85.4 85.4%
9 Master Yes 57 14.6 14.6%
In [80]:
create_bivariate_stacked_bar_chart(df,'EducationField','Attrition','Education Field vs Attrition', 'Education Field', 'Pecentage %')
C:\Users\sasikumarchennova\AppData\Local\Temp\ipykernel_19564\4069780291.py:7: FutureWarning:

Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)

To adopt the future behavior and silence this warning, use 

	>>> .groupby(..., group_keys=True)

Out[80]:
EducationField Attrition Counts Percentage Percent
0 Human Resources No 20 74.1 74.1%
1 Human Resources Yes 7 25.9 25.9%
2 Life Sciences No 522 85.2 85.2%
3 Life Sciences Yes 91 14.8 14.8%
4 Marketing No 123 78.3 78.3%
5 Marketing Yes 34 21.7 21.7%
6 Medical No 397 86.5 86.5%
7 Medical Yes 62 13.5 13.5%
8 Other No 71 86.6 86.6%
9 Other Yes 11 13.4 13.4%
10 Technical Degree No 100 75.8 75.8%
11 Technical Degree Yes 32 24.2 24.2%
In [81]:
create_bivariate_stacked_bar_chart(df,'EnvironmentSatisfaction','Attrition','Environment Satisfaction vs Attrition', 'Environment Satisfaction', 'Pecentage %')
C:\Users\sasikumarchennova\AppData\Local\Temp\ipykernel_19564\4069780291.py:7: FutureWarning:

Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)

To adopt the future behavior and silence this warning, use 

	>>> .groupby(..., group_keys=True)

Out[81]:
EnvironmentSatisfaction Attrition Counts Percentage Percent
0 High No 403 86.1 86.1%
1 High Yes 65 13.9 13.9%
2 Low No 208 74.6 74.6%
3 Low Yes 71 25.4 25.4%
4 Medium No 240 85.1 85.1%
5 Medium Yes 42 14.9 14.9%
6 Ver High No 382 86.6 86.6%
7 Ver High Yes 59 13.4 13.4%
In [82]:
create_bivariate_stacked_bar_chart(df,'Gender','Attrition','Gender vs Attrition', 'Gender', 'Pecentage %')
C:\Users\sasikumarchennova\AppData\Local\Temp\ipykernel_19564\4069780291.py:7: FutureWarning:

Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)

To adopt the future behavior and silence this warning, use 

	>>> .groupby(..., group_keys=True)

Out[82]:
Gender Attrition Counts Percentage Percent
0 Female No 496 85.1 85.1%
1 Female Yes 87 14.9 14.9%
2 Male No 737 83.1 83.1%
3 Male Yes 150 16.9 16.9%
In [83]:
create_bivariate_stacked_bar_chart(df,'JobInvolvement','Attrition','JobInvolvement vs Attrition', 'JobInvolvement', 'Pecentage %')
C:\Users\sasikumarchennova\AppData\Local\Temp\ipykernel_19564\4069780291.py:7: FutureWarning:

Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)

To adopt the future behavior and silence this warning, use 

	>>> .groupby(..., group_keys=True)

Out[83]:
JobInvolvement Attrition Counts Percentage Percent
0 High No 746 85.5 85.5%
1 High Yes 127 14.5 14.5%
2 Low No 53 65.4 65.4%
3 Low Yes 28 34.6 34.6%
4 Medium No 303 81.5 81.5%
5 Medium Yes 69 18.5 18.5%
6 Very High No 131 91.0 91.0%
7 Very High Yes 13 9.0 9.0%
In [84]:
create_bivariate_stacked_bar_chart(df,'JobRole','Attrition','JobRole vs Attrition', 'JobRole', 'Pecentage %')
C:\Users\sasikumarchennova\AppData\Local\Temp\ipykernel_19564\4069780291.py:7: FutureWarning:

Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)

To adopt the future behavior and silence this warning, use 

	>>> .groupby(..., group_keys=True)

Out[84]:
JobRole Attrition Counts Percentage Percent
0 Healthcare Representative No 121 93.1 93.1%
1 Healthcare Representative Yes 9 6.9 6.9%
2 Human Resources No 40 76.9 76.9%
3 Human Resources Yes 12 23.1 23.1%
4 Laboratory Technician No 194 75.8 75.8%
5 Laboratory Technician Yes 62 24.2 24.2%
6 Manager No 97 95.1 95.1%
7 Manager Yes 5 4.9 4.9%
8 Manufacturing Director No 134 93.1 93.1%
9 Manufacturing Director Yes 10 6.9 6.9%
10 Research Director No 77 97.5 97.5%
11 Research Director Yes 2 2.5 2.5%
12 Research Scientist No 245 84.5 84.5%
13 Research Scientist Yes 45 15.5 15.5%
14 Sales Executive No 276 82.4 82.4%
15 Sales Executive Yes 59 17.6 17.6%
16 Sales Representative No 49 59.8 59.8%
17 Sales Representative Yes 33 40.2 40.2%
In [85]:
create_bivariate_stacked_bar_chart(df,'MaritalStatus','Attrition','MaritalStatus vs Attrition', 'MaritalStatus', 'Pecentage %')
C:\Users\sasikumarchennova\AppData\Local\Temp\ipykernel_19564\4069780291.py:7: FutureWarning:

Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)

To adopt the future behavior and silence this warning, use 

	>>> .groupby(..., group_keys=True)

Out[85]:
MaritalStatus Attrition Counts Percentage Percent
0 Divorced No 293 89.9 89.9%
1 Divorced Yes 33 10.1 10.1%
2 Married No 593 87.2 87.2%
3 Married Yes 87 12.8 12.8%
4 Single No 347 74.8 74.8%
5 Single Yes 117 25.2 25.2%
In [86]:
create_bivariate_stacked_bar_chart(df,'PerformanceRating','Attrition','PerformanceRating vs Attrition', 'PerformanceRating', 'Pecentage %')
C:\Users\sasikumarchennova\AppData\Local\Temp\ipykernel_19564\4069780291.py:7: FutureWarning:

Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)

To adopt the future behavior and silence this warning, use 

	>>> .groupby(..., group_keys=True)

Out[86]:
PerformanceRating Attrition Counts Percentage Percent
0 Excellent No 1048 84.0 84.0%
1 Excellent Yes 200 16.0 16.0%
2 Outstanding No 185 83.3 83.3%
3 Outstanding Yes 37 16.7 16.7%
In [87]:
create_bivariate_stacked_bar_chart(df,'RelationshipSatisfaction','Attrition','Relationship Satisfaction vs Attrition', 'Relationship Satisfaction', 'Pecentage %')
C:\Users\sasikumarchennova\AppData\Local\Temp\ipykernel_19564\4069780291.py:7: FutureWarning:

Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)

To adopt the future behavior and silence this warning, use 

	>>> .groupby(..., group_keys=True)

Out[87]:
RelationshipSatisfaction Attrition Counts Percentage Percent
0 High No 395 84.6 84.6%
1 High Yes 72 15.4 15.4%
2 Low No 217 79.2 79.2%
3 Low Yes 57 20.8 20.8%
4 Medium No 256 85.0 85.0%
5 Medium Yes 45 15.0 15.0%
6 Very High No 365 85.3 85.3%
7 Very High Yes 63 14.7 14.7%
In [88]:
create_bivariate_stacked_bar_chart(df,'WorkLifeBalance','Attrition','WorkLife Balance vs Attrition', 'WorkLife Balance', 'Pecentage %')
C:\Users\sasikumarchennova\AppData\Local\Temp\ipykernel_19564\4069780291.py:7: FutureWarning:

Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)

To adopt the future behavior and silence this warning, use 

	>>> .groupby(..., group_keys=True)

Out[88]:
WorkLifeBalance Attrition Counts Percentage Percent
0 Bad No 55 68.8 68.8%
1 Bad Yes 25 31.2 31.2%
2 Best No 126 82.4 82.4%
3 Best Yes 27 17.6 17.6%
4 Better No 769 85.8 85.8%
5 Better Yes 127 14.2 14.2%
6 Good No 283 83.0 83.0%
7 Good Yes 58 17.0 17.0%
In [ ]: